Code
import pandas as pd
import altair as alt
import seaborn as sns
import plotly.express as px
from vega_datasets import data
import matplotlib.pyplot as pltimport pandas as pd
import altair as alt
import seaborn as sns
import plotly.express as px
from vega_datasets import data
import matplotlib.pyplot as pltdf = pd.read_csv('../data/clean_data.csv')custom_palette = ['#00072D', '#0A2472', '#0E6BA8', '#A6E1FA', '#99ABC5', '#8B748F', '#6F0624']#calculate averages of all numeric columns
num_cols = df[['State', 'GPA', 'WorkExp', 'TestScore', 'WritingScore', 'VolunteerLevel']]
avg_df = num_cols.groupby('State').mean().reset_index()
state_abbr = {
'Alabama': 'AL',
'California': 'CA',
'Colorado': 'CO',
'Florida': 'FL',
'Georgia': 'GA',
'Mississippi': 'MS',
'New York': 'NY',
'Oregon': 'OR',
'Utah': 'UT',
'Vermont': 'VT',
'Virginia': 'VA'
}
avg_df['State_Abbr'] = avg_df['State'].map(state_abbr)
avg_df = avg_df.drop(columns=['State'])
avg_df = avg_df.rename(columns={'State_Abbr': 'State'})import plotly.io as pio
# This ensures Plotly output works in multiple places:
# plotly_mimetype: VS Code notebook UI
# notebook: "Jupyter: Export to HTML" command in VS Code
# See https://plotly.com/python/renderers/#multiple-renderers
pio.renderers.default = "plotly_mimetype+notebook"
fig = px.choropleth(avg_df, locationmode="USA-states",
locations=avg_df['State'],
scope="usa",
color=avg_df['GPA'],
hover_data={"State": True, "GPA": True},
labels={"GPA": "Selected Variable"},
color_continuous_scale=custom_palette
)
dropdown = []
for col in avg_df.columns[:-1]:
dropdown.append({'label': col, 'method': 'update', 'args': [{'z': [avg_df[col]]}]})
fig.update_layout(updatemenus=[{'buttons': dropdown, 'direction': 'down', 'showactive': True}],
title='Choropleth Map of Average Selected Variable')
fig.update_coloraxes(colorbar_title=dict(text='Selected Variable'))
fig.show()#create dataframe of rates for each state by decision
decision_state = df.groupby(['Decision', 'State'])[["GPA"]].count().reset_index()
decision_state = decision_state.rename(columns={'GPA':'StateCount'})
decision_state['DecisionCount'] = decision_state.groupby('Decision')['StateCount'].transform('sum')
decision_state['Rate'] = decision_state['StateCount'] / decision_state['DecisionCount'] * 100
state_id_dict = dict(zip(data.population_engineers_hurricanes()["state"], data.population_engineers_hurricanes()["id"]))
decision_state["StateID"] = decision_state["State"].map(state_id_dict)
admit_states = decision_state[decision_state['Decision'] == "Admit"]
decline_states = decision_state[decision_state['Decision'] == "Decline"]states = alt.topo_feature('https://raw.githubusercontent.com/vega/vega-datasets/master/data/us-10m.json', 'states')
click = alt.selection_multi(fields = ["State"])
existing_states = alt.Chart(states).mark_geoshape(stroke='black').encode(
color = alt.Color("Rate:Q", scale=alt.Scale(range=custom_palette)),
tooltip = ["State:N", "Rate:Q"],
opacity = alt.condition('isValid(datum.Rate)', alt.value(1), alt.value(0.2)),
).transform_lookup(
lookup = "id",
from_ = alt.LookupData(admit_states, "StateID", list(admit_states.columns))
).properties(width = 333, height = 200, title="Admission Rates by State").add_selection(click).project(type = "albersUsa").interactive()
missing_states = (
alt.Chart(states)
.mark_geoshape(fill = "grey", stroke = "white")
.encode(opacity=alt.condition("isValid(datum.Rate)", alt.value(0), alt.value(0.2))).add_selection(click).project(type = "albersUsa")
)
admit_map = existing_states + missing_states
admit_map = admit_map.encode(
tooltip= ["State:N", "Rate:Q"]
).transform_lookup(
lookup="id",
from_=alt.LookupData(admit_states, "StateID", list(admit_states.columns))
).interactive()
existing_states = alt.Chart(states).mark_geoshape(stroke='black').encode(
color = alt.Color("Rate:Q", scale=alt.Scale(range=custom_palette)),
tooltip = ["State:N", "Rate:Q"],
opacity = alt.condition('isValid(datum.Rate)', alt.value(1), alt.value(0.2)),
).transform_lookup(
lookup = "id",
from_ = alt.LookupData(decline_states, "StateID", list(decline_states.columns))
).properties(width = 333, height = 200, title="Rejection Rates by State").add_selection(click).project(type = "albersUsa").interactive()
missing_states = (
alt.Chart(states)
.mark_geoshape(fill = "grey", stroke = "white")
.encode(opacity=alt.condition("isValid(datum.Rate)", alt.value(0), alt.value(0.2))).add_selection(click).project(type = "albersUsa")
)
decline_map = existing_states + missing_states
decline_map = decline_map.encode(
tooltip= ["State:N", "Rate:Q"]
).transform_lookup(
lookup="id",
from_=alt.LookupData(decline_states, "StateID", list(decline_states.columns))
).interactive()
admit_map | decline_map%%html
<img src="../website/images/decision_pairplot.png">